#!/usr/bin/env python

# Copyright 2016 The Fuchsia Authors
#
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT

"""

This tool will symbolize a crash from Zircon's crash logger, adding
function names and, if available, source code locations (filenames and
line numbers from debug info).

Example usage #1:
  ./scripts/run-zircon -a x86 | ./scripts/symbolize devmgr.elf --build-dir=build-x86

Example usage #2:
  ./scripts/symbolize devmgr.elf --build-dir=build-x86
  <copy and paste output from Zircon>

Example usage #3 (for zircon kernel output):
  ./scripts/symbolize --build-dir=build-x86
  <copy and paste output from Zircon>


"""

import argparse
import errno
import os
import re
import subprocess
import sys

SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
PREBUILTS_BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(SCRIPT_DIR), "prebuilt",
                                                  "downloads"))
GCC_VERSION = '6.3.0'
name_to_full_path = {}
debug_mode = False


def find_func(find_args, dirname, names):
    if find_args["path"] != "":  # found something!
        return
    if dirname.find("sysroot") != -1:
        return
    for name in names:
        if name == find_args["name"]:
            find_args["path"] = dirname
            return


def find_file_in_build_dir(name, build_dirs):
    find_args = {"name": name, "path": ""}
    for location in build_dirs:
        os.path.walk(location, find_func, find_args)
        if find_args["path"] != "":
            return os.path.abspath(os.path.join(find_args["path"], name))
    return None


def buildid_to_full_path(buildid, build_dirs):
    for build_dir in build_dirs:
        id_file_path = os.path.join(build_dir, "ids.txt")
        if os.path.exists(id_file_path):
            with open(id_file_path) as id_file:
                for line in id_file:
                    id, path = line.split()
                    if id == buildid:
                        return path
    return None


def find_file_in_boot_manifest(boot_app_name, build_dirs):
    manifest_path = find_file_in_build_dir("bootfs.manifest", build_dirs)
    if manifest_path:
        with open(manifest_path) as manifest_file:
            for line in manifest_file:
                out_path, in_path = line.rstrip().split("=")
                if out_path == boot_app_name:
                    if in_path.endswith(".strip"):
                        in_path = in_path[:-len(".strip")]
                    return in_path
    return None


def find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs):
    if dso in name_to_buildid:
        found_path = buildid_to_full_path(name_to_buildid[dso], build_dirs)
        if found_path:
            return found_path
        # This can be a bug in the generation of the ids.txt file, report it.
        # It's not necessarily a bug though, so for now only report in debug mode.
        if debug_mode:
            print "WARNING: Unable to find %s in any ids.txt file." % dso

    # The name 'app' indicates the real app name is unknown.
    # If the process has a name property that will be printed, but
    # it has a max of 32 characters so it may be insufficient.
    # Crashlogger prefixes such names with "app:" for our benefit.
    if dso == "app" or dso.startswith("app:"):
        # If an executable was passed on the command-line, try using that
        if exe_name:
            found_path = find_file_in_build_dir(exe_name, build_dirs)
            if found_path:
                return found_path

        # If this looks like a program in boot fs, consult the manifest
        if dso.startswith("app:/boot/"):
            boot_app_name = dso[len("app:/boot/"):]
            found_path = find_file_in_boot_manifest(boot_app_name, build_dirs)
            if found_path:
                return found_path
        return None

    # First, try an exact match for the filename
    found_path = find_file_in_build_dir(dso, build_dirs)
    if not found_path:
        # If that fails, and this file doesn't end with .so, try the executable
        # name
        if not dso.endswith(".so"):
            found_path = find_file_in_build_dir(exe_name, build_dirs)
    if not found_path:
        # If that still fails and this looks like an absolute path, try the
        # last path component
        if dso.startswith("/"):
            short_name = dso[dso.rfind("/"):]
            found_path = find_file_in_build_dir(short_name, build_dirs)
    return found_path


def find_dso_full_path(dso, exe_name, name_to_buildid, build_dirs):
    if dso in name_to_full_path:
        return name_to_full_path[dso]
    found_path = find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs)
    if found_path:
        name_to_full_path[dso] = found_path
    return found_path


def tool_path(arch, tool):
    if sys.platform.startswith("linux"):
        platform = "Linux"
    elif sys.platform.startswith("darwin"):
        platform = "Darwin"
    else:
        raise Exception("Unsupported platform!")
    return ("%s/%s-elf-%s-%s-x86_64/bin/%s-elf-%s" %
            (PREBUILTS_BASE_DIR, arch, GCC_VERSION, platform, arch, tool))


def run_tool(arch, tool, *args):
    cmd = [tool_path(arch, tool)] + list(args)
    if debug_mode:
        print "Running: %s" % " ".join(cmd)
    try:
        output = subprocess.check_output(cmd)
    except Exception as e:
        print "Calling %s failed: command %s error %s" % (tool, cmd, e)
        return False
    return output


# Note: addr2line requires hex addresses.
# |addr_as_hex_string| must already be PIE-adjusted.
def run_addr2line(arch, elf_path, addr_as_hex_string):
    return run_tool(arch, "addr2line", "-Cipfe", elf_path, addr_as_hex_string)


def get_call_location(arch, elf_path, addr_as_hex_string):
    # Subtract 1 to convert from a return address to a call site
    # address.  (To be more exact, this converts to an address that
    # is within the call site instruction.)  This adjustment gives
    # more correct results in the presence of inlining and
    # 'noreturn' functions.  (See ZX-842.)
    call_addr = "0x%x" % (int(addr_as_hex_string, 16) - 1)
    return run_addr2line(arch, elf_path, call_addr)


# On BSD platforms there are cases where writing to stdout can return EAGAIN.
# In that event, retry the line again. This only manifests itself when piping
# qemu's stdout directly to this script.
def writelines(lines):
    for line in lines:
        writeline(line)


def writeline(line):
    while True:
        try:
            sys.stdout.write(line)
        except IOError as e:
            if e.errno == errno.EAGAIN:
                continue
        break


def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("--file", "-f", nargs="?", type=argparse.FileType("r"),
                        default=sys.stdin,
                        help="File to read from, stdin by default")
    parser.add_argument("--build-dir", "-b", nargs="*",
                        help="List of build directories to search instead of"
                             " the default Fuchsia build directory"
                             " (out/<variant>-<arch>)")
    parser.add_argument("--disassemble", "-d", action="store_true",
                        help="Show disassembly of each function")
    parser.add_argument("--stack_size", "-s", type=int,
                        default=256*1024,
                        help="Change the assumed size of the stack (e.g. use 1048576 for ftl or mtl default thread size")
    parser.add_argument("--echo", dest="echo", action="store_true",
                        help="Echo lines of input (on by default)")
    parser.add_argument("--no-echo", dest="echo", action="store_false",
                        help="Don't echo lines of input")
    parser.add_argument("--debug", "-D", action="store_true",
                        help="Print messages for debugging symbolize")
    parser.add_argument("app", nargs="?", help="Name of primary application")
    parser.set_defaults(echo=True)
    args = parser.parse_args()
    global debug_mode
    debug_mode = args.debug

    zircon_build_dir = os.path.join(
        os.path.dirname(SCRIPT_DIR), "build-x86")
    if not os.path.exists(zircon_build_dir):
        zircon_build_dir = os.path.join(
            os.path.dirname(SCRIPT_DIR), os.pardir, "out", "build-zircon", "build-x86")
    build_dirs = [zircon_build_dir]
    if args.build_dir:
        build_dirs = args.build_dir + build_dirs
    else:
        # Put the unstripped path ahead of the stripped one, we want the
        # former searched first. This does mean the unstripped directory
        # will get searched twice, but relative to the entire search time,
        # the addition is small.
        # Plus once a file is found its location is cached.
        # Plus this is only used as a fallback in case the file isn't found
        # in ids.txt.
        fuchsia_build_dir = os.path.abspath(os.path.join(
            os.path.dirname(SCRIPT_DIR), os.pardir, "out", "debug-x86-64"))
        fuchsia_unstripped_dir = os.path.join(fuchsia_build_dir,
            "exe.unstripped")
        build_dirs = [fuchsia_unstripped_dir, fuchsia_build_dir] + build_dirs

    # Parsing vars
    arch = "x86_64"
    bias = 0
    name_to_buildid = {}
    processed_lines = []
    prev_sp = None
    prev_frame_num = None
    frame_sizes = []
    total_stack_size = 0
    # If True and we see a dso line, start over collecting the list.
    done_dso_list = True

    # Regex for parsing
    # Either nothing, or something like "[00007.268] 00304.00325> "
    full_prefix = "^(|\[\d+\.\d+\] \d+\.\d+> )"
    btre = re.compile(full_prefix + "bt#(\d+):")
    bt_with_offset_re = re.compile(full_prefix +
        "bt#(\d+): pc 0x[0-9a-f]+ sp (0x[0-9a-f]+) \(([^,]+),(0x[0-9a-f]+)\)$")
    bt_end_re = re.compile(full_prefix + "bt#(\d+): end")
    arch_re = re.compile(full_prefix + "arch: ([\\S]+)$")
    build_id_re = re.compile(full_prefix +
        "dso: id=([0-9a-z]+) base=(0x[0-9a-f]+) name=(.+)$")
    disasm_re = re.compile("^ *(0x[0-9a-f]+)( .+)$")

    # Zircon backtraces
    zircon_crash_re = re.compile(full_prefix + "ZIRCON KERNEL PANIC$")
    # TODO(cja): Add ARM to the regex
    zircon_pc_re = re.compile("RIP: (0x[0-9a-z]+)")
    zircon_bt_re = re.compile(full_prefix +
        "bt#(\d+): (\dx[0-9a-fA-F]+)$")
    zircon_elf_path = ''
    zircon_bt = False
    zircon_pc = ''

    while True:
        line = args.file.readline()
        if args.echo and not args.file.isatty():
            writeline(line)
        end_of_file = (line == '')
        # Strip any trailing carriage return character ("\r"), since
        # these appear in the serial console output from QEMU.
        line = line.rstrip()
        if bt_end_re.match(line):
            if len(processed_lines) != 0:
                writeline("\nstart of symbolized stack:\n")
                writelines(processed_lines)
                writeline("end of symbolized stack\n")

                if total_stack_size > args.stack_size - 8*1024:
                    if total_stack_size >= args.stack_size:
                        message = "Overflowed stack"
                    else:
                        message = "Potentially overflowed stack"
                    writeline("WARNING: %s (total usage: %d, stack size: %d)\n" % (message, total_stack_size, args.stack_size))
                    for frame, size in frame_sizes:
                        writeline("#%s: %d bytes\n" % (frame, size))
                # Since we don't have the last stack frame, leave some leeway
                # when warning about stackoverflow
                processed_lines = []
                frames_sizes = []
                total_stack_size = 0
                prev_sp = None
                zircon_bt = False
            continue

        m = arch_re.match(line)
        if m:
            arch = m.group(2)
            continue
        m = build_id_re.match(line)
        if m:
            if done_dso_list:
                name_to_buildid = {}
                done_dso_list = False
            buildid = m.group(2)
            bias = int(m.group(3), 16)
            name = m.group(4)
            name_to_buildid[name] = buildid
            continue
        # We didn't see a dso line, so we're done with this list.
        # The next time we see one means we're starting a new list.
        done_dso_list = True
        m = btre.match(line)
        if m and not zircon_bt:
            frame_num = m.group(2)
            m = bt_with_offset_re.match(line)
            if m:
                sp = int(m.group(3), 16)
                if prev_sp is not None:
                    frame_size = sp - prev_sp
                    total_stack_size += frame_size
                    frame_sizes.append((prev_frame_num, frame_size))
                prev_sp = sp
                prev_frame_num = frame_num

                dso = m.group(4)
                off = m.group(5)
                dso_full_path = find_dso_full_path(
                    dso, args.app, name_to_buildid, build_dirs)
                if dso_full_path:
                    call_loc = get_call_location(arch, dso_full_path, off)
                    if call_loc:
                        processed_lines.append(
                            "#%s: %s" % (frame_num, call_loc))
                    if args.disassemble:
                        pc = int(off, 16)
                        disassembly = run_tool(
                            arch, "gdb", "--nx", "--batch", "-ex",
                            "disassemble %#x" % pc, dso_full_path)
                        if disassembly:
                            for line in disassembly.splitlines():
                                m = disasm_re.match(line)
                                if m:
                                    timestamp, addr, rest = m.groups()
                                    addr = int(addr, 16)
                                    if addr == pc:
                                        prefix = "=> "
                                    else:
                                        prefix = "   "
                                        line = "%s%#.16x%s" % (prefix, bias + addr, rest)
                                    processed_lines.append(line + "\n")
                    continue
                else:
                    # can't find dso_full_path
                    processed_lines.append("#%s: unknown, can't find full path for %s\n" % (frame_num, dso))
            else:
                # bt_with_offset_re failed
                processed_lines.append("#%s: unknown, can't find pc, sp or app/library in line\n" % (frame_num,))

        # Zircon Specific Handling
        if zircon_crash_re.search(line):
            zircon_elf_path = find_file_in_build_dir("zircon.elf", build_dirs)
            if zircon_elf_path:
                zircon_bt = True
            else:
                sys.stderr.write("Symbolize could not find the zircon elf binary. Perhaps you need to build "
                                  "zircon or specify the build directory with -b?\n")
            continue

        m = zircon_pc_re.search(line)
        if m:
            zircon_pc = m.group(1)
            continue

        m = zircon_bt_re.match(line)
        if m and zircon_bt:
            frame_num = m.group(2)
            addr = m.group(3)
            # If we saw the instruction pointer for the fault/panic then use it once
            if zircon_pc:
                prefix = "   pc: %s => " % zircon_pc
                a2l_out = run_addr2line(arch, zircon_elf_path, zircon_pc)
                processed_lines.append(prefix +
                        a2l_out.replace("(inlined", (" " * len(prefix)) + "(inlined"))
                zircon_pc = None

            prefix = "bt#%s: %s => " % (frame_num, addr)
            call_loc = get_call_location(arch, zircon_elf_path, addr)
            # In the case of inlined methods, it is more readable if the
            # inlined lines are aligned to be to the right of "=>".
            processed_lines.append(prefix +
                    call_loc.replace("(inlined", (" " * len(prefix)) + "(inlined"))
            continue

        # Exit if we've reached the end of stdin
        if end_of_file:
            break

if __name__ == '__main__':
    sys.exit(main())
